clear 

* set directory 
* cd "directory"

* set paths
global datafolder "data"
global outputfolder "output"

********************************************************************************
*			Figure 2: sampe distribution of log(assets) vs Amadeus
********************************************************************************
	use "$datafolder\maindata.dta", clear
	
	keep if datem >= 660

	gen l_asset = log(TotalAssets)
	
	collapse (firstnm)  l_asset, by(customerid)

	gen sample= 1

	append using "$datafolder\financials.dta" 


	rename CLOSDATE_year year
	keep if year==2014 | sample==1

	replace sample=0 if sample==.

	duplicates tag IDNR, gen(dup)
	drop if dup==1 & MONTHS!=12 & MONTHS!=14
	drop dup 

	replace l_asset = log(TOAS) if sample==0

	twoway hist l_asset if sample==1, bin(25) color(cranberry%50) ///
	||   hist l_asset if sample==0, bin(50)  color(edkblue%50) ///
		graphregion(color(white))  xlabel(#10) ///
		ylabel(#5, angle(horizontal)) xtitle("log(Assets)") ///
		legend(label(1 "Sample") label(2 "Amadeus") ///
		region(lcolor(white) fcolor(none)) pos(6) rows(1))
		
		graph export "$outputfolder\figure_2.pdf", ///
		as(pdf) replace
		graph export "$outputfolder\figure_2.png", ///
		as(png) replace
	